
libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';




*This program constructs Monthly Determinants which is referenced in program #4 (Determinants of Coverage).;


data crsp_monthly;
set MYLIB.CRSP17_21D;
*let's limit to common stocks;
if 10<=shrcd<=11;
year = year(date);
if year >=2018;
month = month(date);
cum_month = (year -2018)* 12 + month;

keep permno ticker shrcd YEAR MONTH cum_month;
run;


*LIMIT TO ONE OBSERVATION PER MONTH;

proc sort nodupkey data=crsp_monthly;
by ticker cum_month;
run;



*create a monthly measure of total DD posts;


data wsb_posts;
set mylib.dd_posts18_21q2;
year = year(day0);
month = month(day0);
cum_month = (year -2018)* 12 + month;
keep cum_month ticker purchase;
run;

proc sort data=wsb_posts;
by ticker cum_month;
run;
proc univariate noprint data=wsb_posts;
by ticker cum_month;
var purchase;
output out=wsb_posts_monthly n=total_posts sum=buy_posts;
run;
data wsb_posts_monthly;
set wsb_posts_monthly;
sell_posts = total_posts - buy_posts;
run;




proc sort data=crsp_monthly;
by ticker cum_month;
run;
data wsb_posts_monthly2;
merge crsp_monthly wsb_posts_monthly;
by ticker cum_month;
if total_posts = . then total_posts =0;
if buy_posts = . then buy_posts =0;
if sell_posts = . then sell_posts =0;
if ticker = '' then delete;
run;



data crsp_monthly;
set mylib.crsp17_21d;
year = year(date);
if year >=2017;
month = month(date);
cum_month = (year -2018)* 12 + month;
turn = vol/shrout;
abn_ret = ret - vwretd;
keep ticker permno ticker shrcd year cum_month turn abn_ret;
run;
proc sort data=crsp_monthly;
by ticker permno year;
run;
proc univariate noprint  data=crsp_monthly;
by ticker permno year;
var turn;
output out=ave_turn mean=ave_turn;
run;
proc univariate noprint data=crsp_monthly;
by ticker permno year;
var abn_ret;
output out=volatility std=ret_vol;
run;
 
data ave_turn;
merge ave_turn  volatility;
by ticker permno year;
run;

data size;
set mylib.crsp17_21d;
year = year(date);
if year >=2017;
month = month(date);
if month =12;
cum_month = (year -2018)* 12 + month;
size = abs(prc * shrout);
keep ticker permno year  date size;
run;
proc sort data=size;
by ticker permno year descending date;
run;
proc sort nodupkey data=size;
by  ticker permno year;
run;

data ave_turn;
merge ave_turn size;
by ticker permno year;
run;




*******************************************************************************;
*now let's bring in compustat data on book-to-market and profitability;
*******************************************************************************;

data ratios;
set mylib.financial_ratios;
month = month (public_date);
year = year(public_date);
if month =12;
run;
proc sort data=ratios;
by ticker permno year;
run;

data ave_turn2;
merge ave_turn ratios;
by ticker permno year;
if size = . then delete;
run;




*******************************************************************************;
*bring in data on institutional ownership from 13f;
*******************************************************************************;



data INST;
set MYLIB.INST_OWN;
INST_OWNERSHIP = INSTOWN_PERC;
MONTH = MONTH(RDATE);
*WILL FOCUS ON MONTH 3 RATHER THAN 12 BECAUSE Q4 OF 2020 SEEMS UNRELIABLY;
IF MONTH =3;
YEAR = YEAR(RDATE);
KEEP TICKER YEAR INST_OWNERSHIP;
run;




PROC SORT DATA=AVE_TURN2;
BY TICKER YEAR;
RUN;
PROC SORT DATA=INST;
BY TICKER YEAR;
RUN;
DATA AVE_TURN2;
MERGE AVE_TURN2 INST;
BY TICKER YEAR;
IF SIZE = . THEN DELETE;
IF INST_OWNERSHIP >1 THEN INST_OWNERSHIP =1;
RUN;





*******************************************************************************;
*bring in data on analyst coverage from IBES
*******************************************************************************;


DATA IBES_COVERAGE;
SET MYLIB.IBES_DETAIL17_20;
YEAR=YEAR(ANNDATS);
TICKER = OFTIC;
IF TICKER = '' THEN DELETE;
KEEP OFTIC TICKER CUSIP ANALYS YEAR;
RUN;
PROC SORT NODUPKEY DATA=IBES_COVERAGE;
BY TICKER YEAR ANALYS;
RUN;
PROC UNIVARIATE NOPRINT DATA=IBES_COVERAGE;
BY TICKER YEAR;
VAR ANALYS;
OUTPUT OUT=IBES_COVERAGE2 N=IBES_COVERAGE;
RUN;

DATA AVE_TURN2;
MERGE AVE_TURN2 IBES_COVERAGE2;
BY TICKER YEAR;
RUN;




*******************************************************************************;
*bring in media coverage from Bloomberg
*******************************************************************************;

PROC SORT DATA=MYLIB.MEDIA_COVERAGE_RANK;
BY TICKER YEAR;
RUN;
PROC UNIVARIATE NOPRINT DATA=MYLIB.MEDIA_COVERAGE_RANK;
BY TICKER YEAR;
VAR VALUE;
OUTPUT OUT= MEDIA MEAN=AVE_MEDIA SUM=TOTAL_MEDIA n=OBS;
RUN;

DATA AVE_TURN2;
MERGE AVE_TURN2 MEDIA;
BY TICKER YEAR;
IF SIZE =  . THEN DELETE;
DROP AVE_MEDIA OBS;
RUN;
data ave_turn3;
set ave_turn2;
CONTROL_YEAR = YEAR;;
DROP YEAR;
run;

proc sql;
create table wsb_posts_monthly3 as
select *
from wsb_posts_monthly2, ave_turn3
where wsb_posts_monthly2.ticker = ave_turn3.ticker and wsb_posts_monthly2.year =ave_turn3.control_year+1;
quit;


proc sort data=wsb_posts_monthly2;
by ticker year;
run;
proc sort data=wsb_posts_monthly3;
by ticker year;
run;
data wsb_posts_monthly2;
merge wsb_posts_monthly2 wsb_posts_monthly3;
by ticker year;
run;



*need to add a few more variables:

1) breadth of ownerhsp (cshr)
2) short interest
3) recent ipo (3 or 6)
4) past returns over month and 2-12 months;

proc sort data=wsb_posts_monthly2;
by ticker date;
run;
data wsb_posts_monthly3;

set wsb_posts_monthly2;
count +1;
by ticker;
if first.ticker then count =1;
run;

data wsb_posts_monthly3;
set wsb_posts_monthly3;
if count <=6 then ipo6 =1; else ipo6 =0;
if count <=3 then ipo3 =1; else ipo3 =0;
if cum_month <=6 then ipo6 = .;
if cum_month <=6 then ipo3 = .;
run;



*** ADD RETURN OVER THE PRIOR 1 MONTH AND 2 TO 12 MONTHS;

data RET;
set mylib.crsp17_21d;
year = year(date);
if year >=2015;
month = month(date);
cum_month = (year -2018)* 12 + month;
LN_RET = LOG(RET +1);
keep  ticker  date RET LN_RET CUM_MONTH;
run;
PROC SORT DATA=RET;
BY TICKER CUM_MONTH;
RUN;
PROC UNIVARIATE NOPRINT DATA=RET;
BY TICKER CUM_MONTH;
VAR LN_RET;
OUTPUT OUT=RETM SUM =LN_RET;
RUN;
DATA RETM;
SET RETM;
RETM = EXP(LN_RET)-1;
IF TICKER = '' THEN DELETE;
RUN;
PROC SORT NODUPKEY DATA=RETM;
BY TICKER CUM_MONTH;
RUN;

data mom;
set RETM;
by TICKER;
count +1;
if first.TICKER then count =1;

l1_ret =lag(RETM);
l2_ret =lag2(RETM);
l3_ret =lag3(RETM);
l4_ret =lag4(RETM);
l5_ret =lag5(RETM);
l6_ret =lag6(RETM);
l7_ret =lag7(RETM);
l8_ret =lag8(RETM);
l9_ret =lag9(RETM);
l10_ret =lag10(RETM);
l11_ret =lag11(RETM);
l12_ret =lag12(RETM);
if count <=1 then l1_ret = .;
if count <=2 then l2_ret = .;
if count <=3 then l3_ret = .;
if count <=4 then l4_ret = .;
if count <=5 then l5_ret = .;
if count <=6 then l6_ret = .;
if count <=7 then l7_ret = .;
if count <=8 then l8_ret = .;
if count <=9 then l9_ret = .;
if count <=10 then l10_ret = .;
if count <=11 then l11_ret = .;
if count <=12 then l12_ret = .;
mom1 = l1_ret;
mom2_12 = (1 +l2_ret) * (1 + l3_ret) * (1+l4_ret) * (1+l5_ret) * (1+l6_ret) * (1+l7_ret) * (1+l8_ret) * (1+l9_ret) * (1+l10_ret) * (1+l11_ret) * (1+l12_ret) -1;
keep TICKER cum_month mom1 mom2_12;
run;


PROC SQL;
CREATE TABLE wsb_posts_monthly3B AS
SELECT *
FROM wsb_posts_monthly3, mom
WHERE wsb_posts_monthly3.TICKER = MOM.TICKER AND wsb_posts_monthly3.CUM_MONTH = MOM.CUM_MONTH;
QUIT;

proc sort nodupkey data=wsb_posts_monthly3B;
by ticker cum_month;
run;
data wsb_posts_monthly3B;
set wsb_posts_monthly3B;
tracker = _n_;
run;


data comp_variables;
set MYLIB.COMP16_20;;
*ADD BY gvkey SO THAT I can set lagged values equal to missing when they are the first gvkey;
BY gvkey;
*variables will always be estimated from the previous year of fiscal-year data;
RET_year_ADJ = fyear+1;



********* CODE BELOW IS FOR CONSUCTION OF BOOK EQUITY *********;


book_equity = CEQ;


********* CODE BELOW IS FOR profitability  *********;

*note  - I don't have data on dividends on preferred so will exclude (probably explains why my results are a little differnt for profitability);
*if no info on txdi will set equal to 0;
if txdi = . then txdi =0;
l1_txdi = lag(txdi);
if first.gvkey then l1_txdi = 0;


prof = (ib+l1_txdi)/book_equity;
pos_prof = prof;
if . < prof <0 then pos_prof =0;
if . <prof <0 then neg_prof =1;
if prof >0 then neg_prof =0;


***let's keep key variables;
*note - lpermno is only avaiable in the crsp-compustat merged file;
if GVKEY = . then delete;
TICKER = TIC;
keep gvkey fyear RET_year_ADJ  TICKER   prof pos_prof neg_prof cshr;

run;



DATA COMP_VARIABLES2;
SET COMP_VARIABLES;
PROF_OLD = PROF;
POS_PROF_OLD = POS_PROF;
NEG_PROF_OLD = NEG_PROF;
keep gvkey fyear RET_year_ADJ  TICKER   prof_OLD pos_prof_OLD neg_prof_OLD cshr;

run;

PROC SQL;
CREATE TABLE wsb_posts_monthly3C AS
SELECT *
FROM wsb_posts_monthly3B, COMP_VARIABLES2
WHERE wsb_posts_monthly3B.TICKER = COMP_VARIABLES2.TICKER AND wsb_posts_monthly3B.YEAR = COMP_VARIABLES2.RET_YEAR_ADJ;
RUN;



proc sort data=wsb_posts_monthly3B;
by tracker;
run;
proc sort nodupkey data=wsb_posts_monthly3c;
by tracker;
run;
data wsb_posts_monthly3B;
merge wsb_posts_monthly3B wsb_posts_monthly3c;
by tracker;
run;


*add in informaiton on short interest;


libname mylib2 'G:\Dropbox\Short Interest';
data short;
set mylib2.short2017_2022;
year = year(datadate);
month = month(datadate);
ticker = tic;
run;


data csho;
set mylib2.csho22;
year = year(datadate);
month = month(datadate);
run;

proc sql;
create table short2 as
select *
from short, csho
where short.gvkey = csho.gvkey and short.year = csho.year and short.month = csho.month;
quit;

data short2;
set short2;
percent_short = SHORTINT/cshom;
if percent_short >1 then percent_short =1;
percent_short_rank = percent_short;
run;
proc sort data=short2;
by ticker year month;
run;
proc univariate noprint data=short2;
by ticker year month;
var percent_short;
output out=short3 mean=percent_short;
run;


data short3;
set short3;
ind_var_cum_month = (year - 2018) * 12 + month;
IF TICKER = '' THEN DELETE;
IF PERCENT_SHORT >0.10 THEN HEAVY_SHORT =1;
ELSE HEAVY_SHORT =0;
run;


proc sql;
create table wsb_posts_monthly3D as
select *
from wsb_posts_monthly3b, short3
where wsb_posts_monthly3b.ticker = short3.ticker and wsb_posts_monthly3b.CUM_MONTH = short3.ind_var_cum_month+1;
quit;



proc sort data=wsb_posts_monthly3B;
by tracker;
run;
proc sort nodupkey data=wsb_posts_monthly3D;
by tracker;
run;
data wsb_posts_monthly3B;
merge wsb_posts_monthly3B wsb_posts_monthly3D;
by tracker;
run;


*NOW LET'S CLEAN UP THE SAMPLE;

*START BY DELETING OBSERVATIONS WHER VALUES ARE MISISNG AND IT'S NOT A RECENT IPO;

DATA TABLE2;
SET wsb_posts_monthly3B;
ln_total_post = log (1+total_posts);
ln_buy_post = log (1+buy_posts);
ln_sell_post = log (1+sell_posts);
ln_ave_turn = log (1+ave_turn);
ln_vol = log(ret_vol);
ln_size = log(size);
ln_bm = log(bm);
if ibes_coverage = . then ibes_coverage =0;
ln_ibes_coverage = log (1+ibes_coverage);
ln_inst_own = log(INST_OWNERSHIP);
IF TOTAL_MEDIA = . THEN TOTAL_MEDIA =0;
IF TOTAL_MEDIA = 0 THEN MISSING_MEDIA =1;
ELSE MISSING_MEDIA =0;
ln_media = log (1+total_media);
IF BM <=0 THEN NEG_BM =1;
ELSE NEG_BM =0;
if neg_bm = 1 then ln_bm =0;

if cshr = . then cshr =0;
ln_cshr = log(1+cshr);




LN_PROF = LOG(PROF_OLD);
IF . <PROF_OLD <=0 THEN LN_PROF =0;
IF . <PROF_OLD <0 THEN NEG_PROF_OLD =1;
ELSE NEG_PROF_OLD =0;




if ipo6 =1  then ln_size =0;
if ipo6 =1  then ln_inst_own =0;
if ipo6 =1  then ln_vol =0;
if ipo6 =1  then ln_ave_turn =0;

if ipo6 =1  then ln_ibes_coverage =0;
if ipo6 =1  then pos_roa =0;
if ipo6 =1  then neg_roa =0;
if ipo6 =1  then LN_PROF =0;
if ipo6 =1  then NEG_PROF_OLD =0;
if ipo6 =1  then ln_media =0;
if ipo6 =1  then LN_BM =0;
if ipo6 =1 then mom1 = 0;
if ipo6 =1 then mom2_12 = 0;

if ipo6 = . then ipo6 =0;
if heavy_short = . then heavy_short =0;


if ln_cshr = . then flag =1;
if ln_size = . then flag =1;
if ln_vol = . then flag =1;
if ln_ave_turn = . then flag =1;
if mom1 = . then flag =1;
if ln_ibes_coverage = . then flag =1;
if ln_inst_own = . then flag =1;
if mom2_12 = . then flag =1;

if 7<=cum_month<=42;

RUN;

*winsorize continous dependent variables;


%macro winsor(dsetin=TABLE2, dsetout=TABLE2B, byvar=cum_month, 
vars= ln_ave_turn ln_vol ln_size ln_bm ln_ibes_coverage ln_inst_own ln_cshr mom1 mom2_12 LN_PROF  ln_media percent_short ,
type=winsor, pctl=1 99);
  
%if &dsetout = %then %let dsetout = &dsetin;
     
%let varL=;
%let varH=;
%let xn=1;
  
%do %until ( %scan(&vars,&xn)= );
    %let token = %scan(&vars,&xn);
    %let varL = &varL &token.L;
    %let varH = &varH &token.H;
    %let xn=%EVAL(&xn + 1);
%end;
  
%let xn=%eval(&xn-1);
  
data xtemp;
    set &dsetin;
    run;
  
%if &byvar = none %then %do;
  
    data xtemp;
        set xtemp;
        xbyvar = 1;
        run;
  
    %let byvar = xbyvar;
  
%end;
  
proc sort data = xtemp;
    by &byvar;
    run;
  
proc univariate data = xtemp noprint;
    by &byvar;
    var &vars;
    output out = xtemp_pctl PCTLPTS = &pctl PCTLPRE = &vars PCTLNAME = L H;
    run;
  
data &dsetout;
    merge xtemp xtemp_pctl;
    by &byvar;
    array trimvars{&xn} &vars;
    array trimvarl{&xn} &varL;
    array trimvarh{&xn} &varH;
  
    do xi = 1 to dim(trimvars);
  
        %if &type = winsor %then %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then trimvars{xi} = trimvarl{xi};
              if (trimvars{xi} > trimvarh{xi}) then trimvars{xi} = trimvarh{xi};
            end;
        %end;
  
        %else %do;
            if not missing(trimvars{xi}) then do;
              if (trimvars{xi} < trimvarl{xi}) then delete;
              if (trimvars{xi} > trimvarh{xi}) then delete;
            end;
        %end;
  
    end;
    drop &varL &varH xbyvar xi;
    run;
  
%mend winsor;
%winsor(dsetin=TABLE2, dsetout=TABLE2B, byvar=cum_month, 
vars= ln_ave_turn ln_vol ln_size ln_bm ln_ibes_coverage ln_inst_own ln_cshr mom1 mom2_12 LN_PROF  ln_media percent_short,
type=winsor, pctl=1 99);



*adding in some more variables:

#1) max ;


data crsp_monthly;
set mylib.crsp17_21d;
year = year(date);
if year >=2018;
month = month(date);
cum_month = (year -2018)* 12 + month +1;

keep permno ticker shrcd YEAR MONTH cum_month ret;
run;


proc sort data=crsp_monthly;
by ticker cum_month;
run;
proc univariate noprint data=crsp_monthly;
by ticker cum_month;
var ret;
output out=max max=max_ret;
run;

proc sort data=table2b;
by ticker cum_month;
run;
data table2b;
merge table2b max;
by ticker cum_month;
if permno = . then delete;
max_ret_rank = max_ret;
run;


proc sort data=table2b;
by cum_month;
run;
proc rank data=table2b out=table2b groups =10;
by cum_month;
var max_ret_rank;
run;

data table2b;
set table2b;
*will define top quintile (i.e. a rank of 8 or 9) as high_max;
if max_ret_rank >=8 then high_max =1;
else high_max =0;
run;






*NOW LET'S ADD IN NON-RESEARCH POSTS;

Data posts;
set mylib.NON_RESEARCH_POSTS;
hour = hour(created_utc);
minute = minute(created_utc);
if hour > 16 then next_day =1;
else next_day =0;
post_date = datepart(created_utc);

adj_date =  post_date + next_day;
weekday = weekday(adj_date);
tradedate = adj_date;
if weekday = 7 then tradedate = adj_date +2;
if weekday = 1 then tradedate = adj_date +1;

format post_date yymmdd10.;
format tradedate yymmdd10.;

year = year(tradedate);
month = month(tradedate);
day = day(tradedate);


cum_month = (year -2018)* 12 + month;

IF 7<=CUM_MONTH<=42;
comments  = num_comments * 1;
run;


proc sort data=posts;
by author flair definite_ticker tradedate descending   comments;
run;
proc means data=posts;
var comments;
run;

*delete duplicate posts by same author and date - 56503;
proc sort nodupkey data=posts out=posts2;
by author flair definite_ticker tradedate;
run;


*focus on subset of posts that are very clearly non-research related (N = 20,269);




data posts2;
set posts2;

if flair = 'Gain' then keep =1;
if flair = 'Loss' then keep =1;
if flair = 'Meme' then keep =1;
if flair = 'Shitpost' then keep =1;
if flair = 'News' then keep =1;

if keep =1;

keep author flair definite_ticker tradedate comments cum_month;
run;




data daily_panelq2_2021;
set mylib.daily_panel_rfs;
keep ticker permno  DATE ;
run;


*13,341 posts;




proc sql;
create table posts3 as
select *
from posts2, daily_panelq2_2021
where posts2.definite_ticker = daily_panelq2_2021.ticker and posts2.tradedate =  daily_panelq2_2021.date;
quit;


PROC SORT DATA=POSTS3;
BY TICKER tradedate cum_month;
run;
proc univariate noprint data=posts3;
by ticker tradedate cum_month;
var comments;
output out=non_research_posts n=non_research_posts;
run;

*will create monthly measure of Seeking Alpha coverage;


DATA SA;
*CREATED IN [SA_SAMPLE_2_3_22];
SET MYLIB.SEEKING_ALPHA2b;
cum_month = (year -2018)* 12 + month;
day = day(date);
if cum_month = 37 and day < 13 then delete;
run;
KEEP cum_month tradedate TICKER;
RUN;
PROC SORT DATA=SA;
BY  TICKER tradedate cum_month;
RUN;
PROC UNIVARIATE NOPRINT DATA=SA;
BY  TICKER tradedate cum_month;
VAR cum_month;
OUTPUT OUT=SA_COVERAGE N=SA_COVERAGE;
RUN;

proc univariate data=sa_coverage;
var sa_coverage;
run;
data sa_coverage;
set sa_coverage;
sa_coverage2 = sa_coverage;
if sa_coverage > 5 then sa_coverage2 =5;
run;
proc sort data=sa_coverage;
by ticker cum_month;
run;
proc univariate noprint data=sa_coverage;
by ticker cum_month;
var sa_coverage sa_coverage2;
output out=sa_coverage sum=sa_coverage sa_coverage2;
run;

proc sort data=table2b;
by ticker cum_month;
run;


data t2;
merge table2b  non_research_posts sa_coverage;
by ticker cum_month;

*will winsorize research coverage at 99th percentile;


sa_coverage2 = sa_coverage;
if sa_coverage2 >5 then sa_coverage2 =5;
non_research_posts2 = non_research_posts;
if non_research_posts2 >22 then non_research_posts2 =22;
total_dd_posts = total_posts;
total_dd_posts2 = total_posts;
total_dd_posts2 = total_dd_posts;
if total_dd_posts2 >6 then total_dd_posts2 = 6;


if sa_coverage = . then sa_coverage =0;
if sa_coverage2 = . then sa_coverage2 =0;


if total_dd_posts = . then total_dd_posts =0;
if total_dd_posts2 = . then total_dd_posts2 =0;
if non_research_posts = . then non_research_posts =0;
if non_research_posts2 = . then non_research_posts2 =0;
if 7<=cum_month<=42;
if cum_month <=36 then post_gme =0;
*if cum_month = 37 then post_gme =.;
if cum_month >=37 then post_gme =1;

log_sa_coverage = log (1+sa_coverage)*100;
log_sa_coverage2 = log (1+sa_coverage2)*100;

log_dd_posts = log (1+total_dd_posts)*100;
log_dd_posts2 = log (1+total_dd_posts2)*100;


log_non_research_posts = log (1+non_research_posts)*100;
log_non_research_posts2 = log (1+non_research_posts2)*100;


if ln_inst_own = . then delete;
if mom2_12 = . then delete;
if ln_vol = . then delete;
if mom1 =  . then delete;
run;


*standarize continous measures;

proc standard data=t2 out=table2b_std std =1;
var LN_INST_OWN  ln_cshr   ln_size ln_bm ln_vol ln_ave_turn mom1 mom2_12     LN_MEDIA ln_ibes_coverage max_ret  ;
run;




data TABLE2B_STD;
set table2b_std;

*drop Janary 2021 since it spance both pre and post gme;

if cum_month = 37 then delete;

*add interactions for the post-gme period;


if max_ret_rank >=9 then very_high_max =1;
else very_high_max =0;

ln_inst_own21 = ln_inst_own * post_gme;
ln_size21 = ln_size * post_gme;
ln_bm21 = ln_bm * post_gme;
ln_vol21 = ln_vol * post_gme;
ln_ave_turn21 = ln_ave_turn * post_gme;
mom1_21 = mom1 * post_gme;
mom2_12_21 = mom2_12 * post_gme;
ln_media21 = ln_media * post_gme;
heavy_short21 = heavy_short * post_gme;
ipo6_21 = ipo6 * post_gme;
neg_bm21 = neg_bm * post_gme;
ln_cshr21 = ln_cshr * post_gme;
missing_media21 = missing_media * post_gme;
ln_ibes_coverage21 = ln_ibes_coverage * post_gme;
high_max21 = high_max * post_gme;
very_high_max21 = very_high_max * post_gme;

*add variables for GME_AMC_FLAG;

if ticker = 'GME' then gme_amc_flag =1;
if ticker = 'AMC' then gme_amc_flag =1;
if gme_amc_flag = . then gme_amc_flag =0;


gme_amc_flag_21 =  gme_amc_flag * post_gme;

if ticker = 'GME' then gme =1; else gme =0;
if ticker = 'AMC' then amc =1; else amc =0;

gme_post = gme * post_gme;
amc_post = amc * post_gme;



run;


PROC MEANS DATA=TABLE2B_STD;
VAR LOG_DD_POSTS LOG_DD_POSTS2  LN_INST_OWN LN_INST_OWN21 
ln_cshr  ln_cshr21  ln_size ln_size21 ln_bm ln_bm21  NEG_BM NEG_BM21 ln_vol ln_vol21 high_max high_max21 ln_ave_turn ln_ave_turn21 mom1 mom1_21 mom2_12   mom2_12_21     LN_MEDIA LN_MEDIA21
ln_ibes_coverage ln_ibes_coverage21 HEAVY_SHORT HEAVY_SHORT21 IPO6 ipo6_21 MISSING_MEDIA MISSING_MEDIA21 gme amc  gme_post amc_post ;
RUN;


*SAVE THE FINAL DATASET;

proc standard data=table2b_std out=table2b_std2 std=1;
*by cum_month;
var log_sa_coverage log_sa_coverage2 log_dd_posts log_dd_posts2 log_non_research_posts log_non_research_posts2;
run;

DATA MYLIB.MONTHLY_DETERMINANTS;
SET TABLE2B_STD2;  

KEEP TICKER CUM_MONTH GME_AMC_FLAG LN_INST_OWN  ln_cshr   ln_size ln_bm ln_vol ln_ave_turn mom1 mom2_12     LN_MEDIA ln_ibes_coverage HEAVY_SHORT IPO6 NEG_BM MISSING_MEDIA
LN_INST_OWN21  ln_cshr21   ln_size21 ln_bm21 ln_vol21 ln_ave_turn21 mom1_21 mom2_12_21     LN_MEDIA21 ln_ibes_coverage21 HEAVY_SHORT21 IPO6_21 NEG_BM21 MISSING_MEDIA21
 high_max high_max21 very_high_max very_high_max21


log_sa_coverage log_sa_coverage2 log_dd_posts log_dd_posts2 log_non_research_posts log_non_research_posts2 gme_amc_flag gme_amc_flag_21 gme amc gme_post amc_post;
RUN;

proc sort data=table2b_std2;
by cum_month;
run;

proc glm data=table2b_std2;
absorb cum_month;
model log_sa_coverage2 =   LN_INST_OWN LN_INST_OWN21 
ln_cshr  ln_cshr21  ln_size ln_size21 ln_bm ln_bm21  NEG_BM NEG_BM21 ln_vol ln_vol21 high_max high_max21 ln_ave_turn ln_ave_turn21 mom1 mom1_21 mom2_12   mom2_12_21     LN_MEDIA LN_MEDIA21
ln_ibes_coverage ln_ibes_coverage21 HEAVY_SHORT HEAVY_SHORT21 IPO6 ipo6_21 MISSING_MEDIA MISSING_MEDIA21 gme_amc_flag gme_amc_flag_21 ;
quit;
